library(rstan)
## Warning: package 'rstan' was built under R version 4.3.3
## Loading required package: StanHeaders
## Warning: package 'StanHeaders' was built under R version 4.3.3
##
## rstan version 2.32.7 (Stan version 2.32.2)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## For within-chain threading using `reduce_sum()` or `map_rect()` Stan functions,
## change `threads_per_chain` option:
## rstan_options(threads_per_chain = 1)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
library(rstanarm)
## Loading required package: Rcpp
## Warning: package 'Rcpp' was built under R version 4.3.3
## This is rstanarm version 2.32.1
## - See https://mc-stan.org/rstanarm/articles/priors for changes to default priors!
## - Default priors may change, so it's safest to specify priors, even if equivalent to the defaults.
## - For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores())
##
## Attaching package: 'rstanarm'
## The following object is masked from 'package:rstan':
##
## loo
library(bayesplot)
## Warning: package 'bayesplot' was built under R version 4.3.3
## This is bayesplot version 1.12.0
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
## * Does _not_ affect other ggplot2 plots
## * See ?bayesplot_theme_set for details on theme setting
library(tidyr) # for pivot_longer
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:rstan':
##
## extract
library(dplyr) # for %>%
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(truncnorm) # for rnorm with minimum and maximum values
library(loo)
## Warning: package 'loo' was built under R version 4.3.3
## This is loo version 2.8.0
## - Online documentation and vignettes at mc-stan.org/loo
## - As of v2.0.0 loo defaults to 1 core but we recommend using as many as possible. Use the 'cores' argument or set options(mc.cores = NUM_CORES) for an entire session.
##
## Attaching package: 'loo'
## The following object is masked from 'package:rstan':
##
## loo
library(lubridate) # for simplifying working with dates
## Warning: package 'lubridate' was built under R version 4.3.3
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(posterior) # for checking warnings of stan model
## Warning: package 'posterior' was built under R version 4.3.3
## This is posterior version 1.6.1
##
## Attaching package: 'posterior'
## The following object is masked from 'package:bayesplot':
##
## rhat
## The following objects are masked from 'package:rstan':
##
## ess_bulk, ess_tail
## The following objects are masked from 'package:stats':
##
## mad, sd, var
## The following objects are masked from 'package:base':
##
## %in%, match
# Create custom palette because I want to distinguish well between nations and don't like the existing options
custom_palette <- c("#9F002E", "#B23AEE", "#FF50FF", "#FF7F00", "#FFB900", "#00EEEE", "#4EEE94","#458B00", "#4876FF")
# Load the dataset
#install.packages("mlmRev")
library(mlmRev)
## Loading required package: lme4
## Warning: package 'lme4' was built under R version 4.3.3
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
data("Mmmec")
# Check for missing values
colSums(is.na(Mmmec))
## nation region county deaths expected uvb
## 0 0 0 0 0 0
# Summarize statistics about deaths and uvb overall
summary(Mmmec$deaths)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 8.00 14.50 27.83 31.00 313.00
# 1st quantile = 8 means that 25% of the observations have less than 8 deaths
# 3rd quantile = 31 means that 75% of the observations have less than 31 deaths
deaths_by_country <- aggregate(deaths ~ nation, data = Mmmec, sum)
print(deaths_by_country)
## nation deaths
## 1 Belgium 449
## 2 W.Germany 2949
## 3 Denmark 681
## 4 France 1495
## 5 UK 2179
## 6 Italy 1462
## 7 Ireland 67
## 8 Luxembourg 23
## 9 Netherlands 546
summary(Mmmec$uvb)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -8.900200 -4.158400 -0.886400 0.000204 3.275525 13.359000
# Explorative analysis
# Some histograms of the distribution of deaths and expected deaths
# Only deaths
ggplot(Mmmec, aes(x = deaths)) +
geom_histogram(binwidth = 1, fill = "red", color = "black") +
ggtitle("Distribution of Deaths")

ggsave(file="images/distribution_deaths.pdf", width=18,height=10.5)
# Only expected deaths
ggplot(Mmmec, aes(x = expected)) +
geom_histogram(binwidth = 1, fill = "yellow", color = "black") +
ggtitle("Distribution of Expected Deaths")

ggsave(file="images/distribution_expecteddeaths.pdf", width=18,height=10.5)
# Both deaths and expected deaths
# Gather the data into a long format using pivot_longer
Mmmec_long <- Mmmec %>%
pivot_longer(cols = c(deaths, expected), names_to = "type", values_to = "value")
# Create an overlapped (position="identity") histogram with transparency (alpha=0.6)
ggplot(Mmmec_long, aes(x = value, fill = type)) +
geom_histogram(binwidth = 1, alpha = 0.6, position = "identity", color = "black") +
ggtitle("Distribution of Deaths and Expected Deaths") +
xlab("Deaths / Expected Deaths") +
scale_fill_manual(values = c("deaths" = "red", "expected" = "yellow"), labels = c("Deaths", "Expected Deaths"))

ggsave(file="images/distribution_deaths_expected.pdf", width=18,height=10.5)
# Some scatter plots of deaths VS UVB exposure
# With a single regression line
ggplot(Mmmec, aes(x = uvb, y = deaths, color = nation)) +
geom_smooth(method = "lm", color = "blue") +
scale_colour_manual(values = custom_palette) +
ggtitle("Deaths VS UVB Exposure") +
geom_jitter()
## `geom_smooth()` using formula = 'y ~ x'

ggsave(file="images/scatter_deaths_vs_uvb.pdf", width=9.3,height=7)
## `geom_smooth()` using formula = 'y ~ x'
# With a regression line for each nation
ggplot(Mmmec, aes(x = uvb, y = deaths, color = nation)) +
geom_smooth(method = "lm", se = FALSE) + # Adds a regression line for each nation
scale_colour_manual(values = custom_palette) +
ggtitle("Deaths VS UVB Exposure") +
geom_jitter()
## `geom_smooth()` using formula = 'y ~ x'

ggsave(file="images/scatter_deaths_vs_uvb_regression_by_nation.pdf", width=9.3,height=7)
## `geom_smooth()` using formula = 'y ~ x'
# Or dividing by nation using facet_wrap, so that it is less chaotic
ggplot(Mmmec, aes(x = uvb, y = deaths, color = nation)) +
geom_point() +
geom_smooth(method = "lm", color = "blue") +
facet_wrap(~ nation) +
scale_colour_manual(values = custom_palette) +
ggtitle("Deaths VS UVB Exposure by Nation") +
theme(legend.position = "none")
## `geom_smooth()` using formula = 'y ~ x'

ggsave(file="images/scatter_deaths_vs_uvb_by_nation.pdf", width=8,height=7)
## `geom_smooth()` using formula = 'y ~ x'
# Boxplots of deaths by nation
ggplot(Mmmec, aes(x = nation, y = deaths)) +
geom_boxplot(fill = custom_palette) +
ggtitle("Deaths across counties in nations")

ggsave(file="images/boxplot_deaths_by_nation.pdf", width=8,height=7)
# Generalized linear mixed model with frequentist approach (in particular hierarchical approach)
M1 <- glmer(deaths ~ uvb + (1 | region) + (1 | nation), Mmmec, poisson, offset = log(expected))
# (1 | k) includes varying intercepts for each k
# log(expected) is an offset term to adjust the model to account for the expected number of deaths
summary(M1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: deaths ~ uvb + (1 | region) + (1 | nation)
## Data: Mmmec
## Offset: log(expected)
##
## AIC BIC logLik -2*log(L) df.resid
## 2198.7 2214.2 -1095.3 2190.7 350
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.9440 -0.7788 -0.0071 0.6277 3.9102
##
## Random effects:
## Groups Name Variance Std.Dev.
## region (Intercept) 0.04829 0.2198
## nation (Intercept) 0.13708 0.3702
## Number of obs: 354, groups: region, 78; nation, 9
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.06398 0.13351 -0.479 0.6318
## uvb -0.02822 0.01139 -2.478 0.0132 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## uvb 0.185
# Bayes approach relying on HMC sampling from the posterior distribution
M1.rstanarm <- stan_glmer(deaths ~ uvb + (1 | region) + (1 | nation), Mmmec, poisson, offset = log(expected))
##
## SAMPLING FOR MODEL 'count' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 6.4e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.64 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 4.498 seconds (Warm-up)
## Chain 1: 3.279 seconds (Sampling)
## Chain 1: 7.777 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'count' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 3.3e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.33 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 4.536 seconds (Warm-up)
## Chain 2: 3.223 seconds (Sampling)
## Chain 2: 7.759 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'count' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 3.9e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.39 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 4.485 seconds (Warm-up)
## Chain 3: 2.986 seconds (Sampling)
## Chain 3: 7.471 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL 'count' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 2.9e-05 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.29 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 3.937 seconds (Warm-up)
## Chain 4: 3.272 seconds (Sampling)
## Chain 4: 7.209 seconds (Total)
## Chain 4:
print(M1.rstanarm)
## stan_glmer
## family: poisson [log]
## formula: deaths ~ uvb + (1 | region) + (1 | nation)
## observations: 354
## ------
## Median MAD_SD
## (Intercept) -0.1 0.2
## uvb 0.0 0.0
##
## Error terms:
## Groups Name Std.Dev.
## region (Intercept) 0.23
## nation (Intercept) 0.48
## Num. levels: region 78, nation 9
##
## ------
## * For help interpreting the printed output see ?print.stanreg
## * For info on the priors used see ?prior_summary.stanreg
summary(M1.rstanarm)
##
## Model Info:
## function: stan_glmer
## family: poisson [log]
## formula: deaths ~ uvb + (1 | region) + (1 | nation)
## algorithm: sampling
## sample: 4000 (posterior sample size)
## priors: see help('prior_summary')
## observations: 354
## groups: region (78), nation (9)
##
## Estimates:
## mean sd 10% 50% 90%
## (Intercept) -0.1 0.2 -0.3 -0.1 0.1
## uvb 0.0 0.0 0.0 0.0 0.0
## b[(Intercept) region:1] 0.4 0.1 0.2 0.4 0.6
## b[(Intercept) region:2] 0.0 0.1 -0.1 0.0 0.2
## b[(Intercept) region:3] -0.5 0.1 -0.6 -0.4 -0.3
## b[(Intercept) region:4] -0.1 0.1 -0.2 -0.1 0.0
## b[(Intercept) region:5] 0.1 0.1 0.0 0.1 0.2
## b[(Intercept) region:6] 0.4 0.1 0.2 0.4 0.5
## b[(Intercept) region:7] 0.0 0.1 -0.2 0.0 0.2
## b[(Intercept) region:8] 0.3 0.1 0.1 0.3 0.4
## b[(Intercept) region:9] -0.1 0.1 -0.2 -0.1 0.1
## b[(Intercept) region:10] -0.1 0.1 -0.2 -0.1 0.0
## b[(Intercept) region:11] -0.3 0.1 -0.4 -0.3 -0.2
## b[(Intercept) region:12] 0.0 0.1 -0.1 0.0 0.1
## b[(Intercept) region:13] -0.2 0.1 -0.3 -0.2 0.0
## b[(Intercept) region:14] 0.1 0.1 0.0 0.1 0.2
## b[(Intercept) region:15] 0.2 0.1 0.0 0.2 0.4
## b[(Intercept) region:16] 0.1 0.1 -0.1 0.1 0.3
## b[(Intercept) region:17] -0.1 0.1 -0.3 -0.1 0.0
## b[(Intercept) region:18] 0.1 0.1 -0.1 0.1 0.3
## b[(Intercept) region:19] 0.1 0.1 -0.1 0.1 0.2
## b[(Intercept) region:20] 0.1 0.1 -0.1 0.1 0.3
## b[(Intercept) region:21] -0.2 0.1 -0.4 -0.2 0.0
## b[(Intercept) region:22] 0.0 0.1 -0.1 0.1 0.2
## b[(Intercept) region:23] 0.2 0.1 0.1 0.2 0.3
## b[(Intercept) region:24] 0.1 0.1 -0.1 0.1 0.2
## b[(Intercept) region:25] -0.1 0.1 -0.2 -0.1 0.1
## b[(Intercept) region:27] 0.0 0.1 -0.1 0.0 0.2
## b[(Intercept) region:28] 0.0 0.1 -0.1 0.0 0.2
## b[(Intercept) region:29] -0.1 0.1 -0.2 -0.1 0.0
## b[(Intercept) region:30] 0.1 0.1 0.0 0.1 0.3
## b[(Intercept) region:31] 0.1 0.2 -0.1 0.1 0.3
## b[(Intercept) region:32] -0.2 0.1 -0.3 -0.2 0.0
## b[(Intercept) region:33] 0.0 0.1 -0.2 0.0 0.1
## b[(Intercept) region:34] -0.2 0.1 -0.3 -0.2 -0.1
## b[(Intercept) region:35] -0.1 0.1 -0.2 -0.1 0.1
## b[(Intercept) region:36] -0.4 0.1 -0.6 -0.4 -0.2
## b[(Intercept) region:37] 0.0 0.1 -0.2 0.0 0.2
## b[(Intercept) region:38] 0.1 0.1 -0.1 0.1 0.2
## b[(Intercept) region:39] 0.1 0.1 0.0 0.1 0.2
## b[(Intercept) region:40] 0.1 0.1 -0.1 0.1 0.2
## b[(Intercept) region:41] 0.0 0.1 -0.1 0.0 0.2
## b[(Intercept) region:42] -0.2 0.1 -0.3 -0.2 -0.1
## b[(Intercept) region:43] -0.2 0.1 -0.3 -0.2 -0.1
## b[(Intercept) region:44] 0.3 0.1 0.2 0.3 0.4
## b[(Intercept) region:45] 0.3 0.1 0.1 0.3 0.4
## b[(Intercept) region:46] -0.1 0.1 -0.2 -0.1 0.1
## b[(Intercept) region:47] 0.0 0.1 -0.1 0.0 0.1
## b[(Intercept) region:48] 0.1 0.1 -0.1 0.1 0.2
## b[(Intercept) region:49] -0.1 0.1 -0.2 -0.1 0.0
## b[(Intercept) region:50] -0.2 0.1 -0.4 -0.2 -0.1
## b[(Intercept) region:51] 0.0 0.1 -0.2 0.0 0.2
## b[(Intercept) region:52] -0.1 0.2 -0.3 -0.1 0.1
## b[(Intercept) region:53] -0.4 0.2 -0.6 -0.4 -0.2
## b[(Intercept) region:54] -0.6 0.1 -0.7 -0.6 -0.4
## b[(Intercept) region:55] 0.2 0.1 0.1 0.2 0.3
## b[(Intercept) region:56] 0.4 0.1 0.2 0.3 0.5
## b[(Intercept) region:57] 0.1 0.1 0.0 0.1 0.3
## b[(Intercept) region:58] 0.2 0.1 0.0 0.2 0.4
## b[(Intercept) region:59] 0.0 0.1 -0.1 0.0 0.1
## b[(Intercept) region:60] 0.0 0.1 -0.2 0.0 0.2
## b[(Intercept) region:61] 0.0 0.2 -0.2 0.0 0.2
## b[(Intercept) region:62] 0.3 0.1 0.1 0.3 0.4
## b[(Intercept) region:63] -0.2 0.1 -0.4 -0.2 -0.1
## b[(Intercept) region:64] -0.1 0.1 -0.3 -0.1 0.1
## b[(Intercept) region:65] -0.2 0.1 -0.4 -0.2 0.0
## b[(Intercept) region:66] 0.2 0.1 0.0 0.2 0.3
## b[(Intercept) region:67] 0.1 0.2 -0.1 0.1 0.3
## b[(Intercept) region:68] 0.0 0.2 -0.2 0.0 0.2
## b[(Intercept) region:69] 0.1 0.2 -0.1 0.1 0.4
## b[(Intercept) region:70] 0.1 0.1 0.0 0.1 0.3
## b[(Intercept) region:71] -0.1 0.2 -0.4 -0.1 0.1
## b[(Intercept) region:72] 0.0 0.2 -0.2 0.0 0.2
## b[(Intercept) region:73] 0.0 0.2 -0.3 0.0 0.2
## b[(Intercept) region:74] 0.0 0.2 -0.3 0.0 0.3
## b[(Intercept) region:75] 0.0 0.2 -0.3 0.0 0.3
## b[(Intercept) region:76] -0.1 0.1 -0.3 -0.1 0.1
## b[(Intercept) region:77] 0.0 0.1 -0.2 0.0 0.2
## b[(Intercept) region:78] 0.2 0.1 0.1 0.2 0.4
## b[(Intercept) region:79] -0.1 0.1 -0.3 -0.1 0.0
## b[(Intercept) nation:Belgium] -0.1 0.2 -0.3 -0.1 0.2
## b[(Intercept) nation:W.Germany] 0.5 0.2 0.3 0.5 0.7
## b[(Intercept) nation:Denmark] 0.6 0.2 0.4 0.6 0.9
## b[(Intercept) nation:France] -0.5 0.2 -0.7 -0.5 -0.3
## b[(Intercept) nation:UK] -0.1 0.2 -0.3 -0.1 0.1
## b[(Intercept) nation:Italy] 0.0 0.2 -0.3 0.0 0.2
## b[(Intercept) nation:Ireland] -0.5 0.2 -0.8 -0.5 -0.2
## b[(Intercept) nation:Luxembourg] 0.0 0.3 -0.3 0.0 0.4
## b[(Intercept) nation:Netherlands] 0.1 0.2 -0.2 0.1 0.3
## Sigma[region:(Intercept),(Intercept)] 0.1 0.0 0.0 0.1 0.1
## Sigma[nation:(Intercept),(Intercept)] 0.2 0.2 0.1 0.2 0.4
##
## Fit Diagnostics:
## mean sd 10% 50% 90%
## mean_PPD 27.8 0.4 27.3 27.8 28.4
##
## The mean_ppd is the sample average posterior predictive distribution of the outcome variable (for details see help('summary.stanreg')).
##
## MCMC diagnostics
## mcse Rhat n_eff
## (Intercept) 0.0 1.0 1228
## uvb 0.0 1.0 1768
## b[(Intercept) region:1] 0.0 1.0 3322
## b[(Intercept) region:2] 0.0 1.0 3000
## b[(Intercept) region:3] 0.0 1.0 3220
## b[(Intercept) region:4] 0.0 1.0 1539
## b[(Intercept) region:5] 0.0 1.0 1624
## b[(Intercept) region:6] 0.0 1.0 2127
## b[(Intercept) region:7] 0.0 1.0 4507
## b[(Intercept) region:8] 0.0 1.0 2435
## b[(Intercept) region:9] 0.0 1.0 1802
## b[(Intercept) region:10] 0.0 1.0 1556
## b[(Intercept) region:11] 0.0 1.0 1481
## b[(Intercept) region:12] 0.0 1.0 1963
## b[(Intercept) region:13] 0.0 1.0 3509
## b[(Intercept) region:14] 0.0 1.0 2059
## b[(Intercept) region:15] 0.0 1.0 2352
## b[(Intercept) region:16] 0.0 1.0 2604
## b[(Intercept) region:17] 0.0 1.0 2493
## b[(Intercept) region:18] 0.0 1.0 4984
## b[(Intercept) region:19] 0.0 1.0 4412
## b[(Intercept) region:20] 0.0 1.0 5063
## b[(Intercept) region:21] 0.0 1.0 5680
## b[(Intercept) region:22] 0.0 1.0 5168
## b[(Intercept) region:23] 0.0 1.0 3901
## b[(Intercept) region:24] 0.0 1.0 4665
## b[(Intercept) region:25] 0.0 1.0 6298
## b[(Intercept) region:27] 0.0 1.0 5785
## b[(Intercept) region:28] 0.0 1.0 4679
## b[(Intercept) region:29] 0.0 1.0 2712
## b[(Intercept) region:30] 0.0 1.0 3792
## b[(Intercept) region:31] 0.0 1.0 6634
## b[(Intercept) region:32] 0.0 1.0 4631
## b[(Intercept) region:33] 0.0 1.0 4573
## b[(Intercept) region:34] 0.0 1.0 3733
## b[(Intercept) region:35] 0.0 1.0 4532
## b[(Intercept) region:36] 0.0 1.0 5014
## b[(Intercept) region:37] 0.0 1.0 5474
## b[(Intercept) region:38] 0.0 1.0 2830
## b[(Intercept) region:39] 0.0 1.0 3415
## b[(Intercept) region:40] 0.0 1.0 3186
## b[(Intercept) region:41] 0.0 1.0 2591
## b[(Intercept) region:42] 0.0 1.0 3156
## b[(Intercept) region:43] 0.0 1.0 2312
## b[(Intercept) region:44] 0.0 1.0 1589
## b[(Intercept) region:45] 0.0 1.0 2064
## b[(Intercept) region:46] 0.0 1.0 2252
## b[(Intercept) region:47] 0.0 1.0 2416
## b[(Intercept) region:48] 0.0 1.0 2660
## b[(Intercept) region:49] 0.0 1.0 2294
## b[(Intercept) region:50] 0.0 1.0 4753
## b[(Intercept) region:51] 0.0 1.0 6778
## b[(Intercept) region:52] 0.0 1.0 5489
## b[(Intercept) region:53] 0.0 1.0 4433
## b[(Intercept) region:54] 0.0 1.0 3852
## b[(Intercept) region:55] 0.0 1.0 3255
## b[(Intercept) region:56] 0.0 1.0 3821
## b[(Intercept) region:57] 0.0 1.0 3798
## b[(Intercept) region:58] 0.0 1.0 4444
## b[(Intercept) region:59] 0.0 1.0 2107
## b[(Intercept) region:60] 0.0 1.0 5193
## b[(Intercept) region:61] 0.0 1.0 7668
## b[(Intercept) region:62] 0.0 1.0 2466
## b[(Intercept) region:63] 0.0 1.0 3849
## b[(Intercept) region:64] 0.0 1.0 5345
## b[(Intercept) region:65] 0.0 1.0 3294
## b[(Intercept) region:66] 0.0 1.0 3960
## b[(Intercept) region:67] 0.0 1.0 5081
## b[(Intercept) region:68] 0.0 1.0 5793
## b[(Intercept) region:69] 0.0 1.0 5740
## b[(Intercept) region:70] 0.0 1.0 2765
## b[(Intercept) region:71] 0.0 1.0 5578
## b[(Intercept) region:72] 0.0 1.0 4949
## b[(Intercept) region:73] 0.0 1.0 5335
## b[(Intercept) region:74] 0.0 1.0 7432
## b[(Intercept) region:75] 0.0 1.0 4392
## b[(Intercept) region:76] 0.0 1.0 3620
## b[(Intercept) region:77] 0.0 1.0 3155
## b[(Intercept) region:78] 0.0 1.0 2568
## b[(Intercept) region:79] 0.0 1.0 3060
## b[(Intercept) nation:Belgium] 0.0 1.0 1643
## b[(Intercept) nation:W.Germany] 0.0 1.0 1340
## b[(Intercept) nation:Denmark] 0.0 1.0 1539
## b[(Intercept) nation:France] 0.0 1.0 1297
## b[(Intercept) nation:UK] 0.0 1.0 1230
## b[(Intercept) nation:Italy] 0.0 1.0 1291
## b[(Intercept) nation:Ireland] 0.0 1.0 1890
## b[(Intercept) nation:Luxembourg] 0.0 1.0 2643
## b[(Intercept) nation:Netherlands] 0.0 1.0 1535
## Sigma[region:(Intercept),(Intercept)] 0.0 1.0 1367
## Sigma[nation:(Intercept),(Intercept)] 0.0 1.0 1746
## mean_PPD 0.0 1.0 3915
## log-posterior 0.3 1.0 914
##
## For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).
# Extract Leave-One-Out Cross-Validation
loo_M1rs <- loo(M1.rstanarm)
## Warning: Found 14 observations with a pareto_k > 0.7. With this many problematic observations we recommend calling 'kfold' with argument 'K=10' to perform 10-fold cross-validation rather than LOO.
print(loo_M1rs)
##
## Computed from 4000 by 354 log-likelihood matrix.
##
## Estimate SE
## elpd_loo -1069.3 25.7
## p_loo 77.4 8.5
## looic 2138.6 51.3
## ------
## MCSE of elpd_loo is NA.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.4, 2.1]).
##
## Pareto k diagnostic values:
## Count Pct. Min. ESS
## (-Inf, 0.7] (good) 340 96.0% 190
## (0.7, 1] (bad) 10 2.8% <NA>
## (1, Inf) (very bad) 4 1.1% <NA>
## See help('pareto-k-diagnostic') for details.
# Using 10-fold CV as suggested by warning due to some observations with too high pareto_k,
# there is a warning about 1 divergent transition after warmup
#kfold_result <- kfold(M1.rstanarm, K = 10)
# But since Rhat=1 and ESS (n_eff)>1000 for every parameter, the resulting posterior is often good enough to move forward
# Posterior predictive checks
y_rep <- posterior_predict(M1.rstanarm)
# densities
pp_check(M1.rstanarm)

ggsave(file="images/densities_M1rstanarm.pdf", width=8,height=7)
# Plot the credible intervals
beta_names <- c(paste0("beta^", c("uvb")), "gl.intercept")
alpha_names<-c()
for (i in 1:25){
alpha_names[i] <- paste0("region[", i,"]")
} # region 26 is missing
for (i in 27:79){
alpha_names[i-1] <- paste0("region[", i,"]")
}
alpha_names[79] <- paste0("Belgium")
alpha_names[80] <- paste0("WG")
alpha_names[81] <- paste0("Denmark")
alpha_names[82] <- paste0("France")
alpha_names[83] <- paste0("UK")
alpha_names[84] <- paste0("Italy")
alpha_names[85] <- paste0("Ireland")
alpha_names[86] <- paste0("Luxembourg")
alpha_names[87] <- paste0("Netherlands")
alpha_names[88] <- paste0(expression(sigma[region]))
alpha_names[89] <- paste0(expression(sigma[nation]))
posterior_M1 <- as.matrix(M1.rstanarm)
mcmc_intervals(posterior_M1, regex_pars=c( "uvb",
"(Intercept)", "b"))+
xaxis_text(on =TRUE, size=rel(1.9))+
yaxis_text(on =TRUE, size=rel(1.4))+
scale_y_discrete(labels = ((parse(text= c(beta_names, alpha_names)))))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

ggsave(file="images/logistic_credible_intervals.pdf", width=5, height=length(alpha_names) * 0.25)
# Plot the posterior marginal densities along with 50% intervals for the ‘fixed-effects’ uvb
mcmc_areas(posterior_M1, regex_pars = c("uvb"))+
xaxis_text(on =TRUE, size=rel(1.9))+
yaxis_text(on =TRUE, size=rel(1.4))

ggsave(file = "images/logistic_fixed_effects.pdf", width=8, height=7)
# Plot the random effects (posterior mean +- s.e.)
int_ord <- sort(coef(M1.rstanarm)$region[,1], index.return=TRUE)$x
ord <- sort(coef(M1.rstanarm)$region[,1], index.return=TRUE)$ix
region.abbr <- levels(Mmmec$region)
region.abbr.ord <- region.abbr[ord]
se_ord <- M1.rstanarm$ses[ord]
par(xaxt="n", mfrow=c(1,1), mar = c(5,2,2,1))
plot(1:length(int_ord), int_ord, ylim=c(-1.4,1.4), pch=19, bg=2, xlab="Regions",
ylab="Intercepts", cex.main=1.9, cex.lab=1.9)
for (h in 1:length(int_ord)){
segments(h, int_ord[h]-se_ord[h], h, int_ord[h]+se_ord[h], col="red")
is.wholenumber <-
function(x, tol = .Machine$double.eps^0.5)
abs(x - round(x)) < tol
if (is.wholenumber(h/2)){
text(h, int_ord[h]+se_ord[h]+0.1, region.abbr.ord[h], cex=1.1)}else{
text(h, int_ord[h]-se_ord[h]-0.1, region.abbr.ord[h], cex=1.1)
}
}

ggsave(file="images/random_effects_log.pdf", height=7, width=length(int_ord) * 0.4)
# empirical distribution function
ppc_ecdf_overlay(y = M1.rstanarm$y, y_rep[1:200,])+
xaxis_text(on =TRUE, size=22)+
legend_text(size=rel(4))

ggsave(file="images/ecdf_M1.rstanarm.pdf", width=8, height=7)
# proportion of zero
prop_zero <- function(x) mean(x == 0)
ppc_stat(y = M1.rstanarm$y, yrep = y_rep, stat = "prop_zero")+
xaxis_text(on =TRUE, size=22)+
yaxis_text(on =TRUE, size=22)+
legend_text(size=rel(4))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/proportion_zero_M1.rstanarm.pdf", width=8, height=7)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# statistics
ppc_stat(y = M1.rstanarm$y, yrep = y_rep, stat="mean")+
xaxis_text(on =TRUE, size=22)+
theme(axis.title.x = element_text( size=22))+
legend_text(size=rel(1.6))
## Note: in most cases the default test statistic 'mean' is too weak to detect anything of interest.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/mean_M1.rstanarm.pdf", width=5, height=5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppc_stat(y = M1.rstanarm$y, yrep = y_rep, stat="sd")+
xaxis_text(on =TRUE, size=22)+
theme(axis.title.x = element_text( size=22))+
legend_text(size=rel(1.6))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/sd_M1.rstanarm.pdf", width=5, height=5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppc_stat(y = M1.rstanarm$y, yrep = y_rep, stat="median")+
xaxis_text(on =TRUE, size=22)+
theme(axis.title.x = element_text( size=22))+
legend_text(size=rel(1.6))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/median_M1.rstanarm.pdf", width=5, height=5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppc_stat(y = M1.rstanarm$y, yrep = y_rep, stat="max")+
xaxis_text(on =TRUE, size=22)+
theme(axis.title.x = element_text( size=22))+
legend_text(size=rel(1.6))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/max_M1.rstanarm.pdf", width=5, height=5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# standardized residuals
mean_y_rep <- colMeans(y_rep)
std_resid <- (M1.rstanarm$y - mean_y_rep) / sqrt(mean_y_rep)
qplot(mean_y_rep, std_resid) + hline_at(2) + hline_at(-2)+
labs(x="Mean of y_rep", y= "Standardized residuals")+
xaxis_text(on =TRUE, size=22)+
yaxis_text(on =TRUE, size=22)+
theme(axis.title.x = element_text(size=16),
axis.title.y = element_text(size=16))
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ggsave(file="images/standardized_residuals_M1.rstanarm.pdf", width =8, height =7)
# predictive intervals
ppc_intervals(
y = M1.rstanarm$y,
yrep = y_rep) +
labs(x = "Deaths", y = "Expected deaths")+
xaxis_text(on =TRUE, size=22)+
yaxis_text(on =TRUE, size=22)+
theme(axis.title.x = element_text(size=20),
axis.title.y = element_text(size=20))

ggsave(file="images/predictive_intervals_M1.rstanarm.pdf", width =8, height =7)
# Stan model of model A of the paper
# It is a variance components model with UVBI included in the fixed part of the model and
# random terms s_k, u_jk, e_ijk associated respectively with the intercept at level 3, 2, 1
# s_k ~ normal(0, sigma_s)
# u_jk ~ normal(0, sigma_u)
# Create UVB index as in Table III of the paper
uvbi_params <- data.frame(
nation = c(unique(Mmmec$nation)),
mean_UVBI = c(12.70, 12.79, 9.96, 17.18, 10.91, 21.45, 10.54, 13.26, 11.40),
sd_UVBI = c(0.29, 1.35, 0.38, 2.59, 1.50, 3.51, 0.60, 0.05, 0.47),
min_UVBI = c(12.17, 10.45, 9.47, 12.92, 6.69, 16.83, 9.64, 13.22, 10.62),
max_UVBI = c(13.10, 15.15, 10.49, 23.24, 13.46, 28.95, 11.70, 13.31, 11.94)
)
# Join UVBI data to the dataframe Mmmec
Mmmec2 <- left_join(Mmmec, uvbi_params, by = "nation")
# Generate UVBI values for each county from mean_UVBI and sd_UVBI, respecting min_UVBI and max_UVBI
Mmmec2 <- Mmmec2 %>%
mutate(UVBI = rtruncnorm(n(), a = min_UVBI, b = max_UVBI, mean = mean_UVBI, sd = sd_UVBI))
# Prepare data for stan model
stan_data <- list(
N = nrow(Mmmec2),
deaths = Mmmec2$deaths,
expected = Mmmec2$expected,
K = length(unique(Mmmec2$nation)),
J = length(unique(Mmmec2$region)),
k = as.integer(as.factor(Mmmec2$nation)),
j = as.integer(as.factor(Mmmec2$region)),
UVBI = Mmmec2$UVBI
)
comp_model_A <- stan_model('poisson_regression.stan')
fit_model_A <- sampling(comp_model_A, data = stan_data, seed = 123, iter=4000)
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 8.2e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.82 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 4000 [ 0%] (Warmup)
## Chain 1: Iteration: 400 / 4000 [ 10%] (Warmup)
## Chain 1: Iteration: 800 / 4000 [ 20%] (Warmup)
## Chain 1: Iteration: 1200 / 4000 [ 30%] (Warmup)
## Chain 1: Iteration: 1600 / 4000 [ 40%] (Warmup)
## Chain 1: Iteration: 2000 / 4000 [ 50%] (Warmup)
## Chain 1: Iteration: 2001 / 4000 [ 50%] (Sampling)
## Chain 1: Iteration: 2400 / 4000 [ 60%] (Sampling)
## Chain 1: Iteration: 2800 / 4000 [ 70%] (Sampling)
## Chain 1: Iteration: 3200 / 4000 [ 80%] (Sampling)
## Chain 1: Iteration: 3600 / 4000 [ 90%] (Sampling)
## Chain 1: Iteration: 4000 / 4000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 6.338 seconds (Warm-up)
## Chain 1: 5.709 seconds (Sampling)
## Chain 1: 12.047 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 2.9e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.29 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 4000 [ 0%] (Warmup)
## Chain 2: Iteration: 400 / 4000 [ 10%] (Warmup)
## Chain 2: Iteration: 800 / 4000 [ 20%] (Warmup)
## Chain 2: Iteration: 1200 / 4000 [ 30%] (Warmup)
## Chain 2: Iteration: 1600 / 4000 [ 40%] (Warmup)
## Chain 2: Iteration: 2000 / 4000 [ 50%] (Warmup)
## Chain 2: Iteration: 2001 / 4000 [ 50%] (Sampling)
## Chain 2: Iteration: 2400 / 4000 [ 60%] (Sampling)
## Chain 2: Iteration: 2800 / 4000 [ 70%] (Sampling)
## Chain 2: Iteration: 3200 / 4000 [ 80%] (Sampling)
## Chain 2: Iteration: 3600 / 4000 [ 90%] (Sampling)
## Chain 2: Iteration: 4000 / 4000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 6.807 seconds (Warm-up)
## Chain 2: 6.532 seconds (Sampling)
## Chain 2: 13.339 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 2.8e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.28 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 4000 [ 0%] (Warmup)
## Chain 3: Iteration: 400 / 4000 [ 10%] (Warmup)
## Chain 3: Iteration: 800 / 4000 [ 20%] (Warmup)
## Chain 3: Iteration: 1200 / 4000 [ 30%] (Warmup)
## Chain 3: Iteration: 1600 / 4000 [ 40%] (Warmup)
## Chain 3: Iteration: 2000 / 4000 [ 50%] (Warmup)
## Chain 3: Iteration: 2001 / 4000 [ 50%] (Sampling)
## Chain 3: Iteration: 2400 / 4000 [ 60%] (Sampling)
## Chain 3: Iteration: 2800 / 4000 [ 70%] (Sampling)
## Chain 3: Iteration: 3200 / 4000 [ 80%] (Sampling)
## Chain 3: Iteration: 3600 / 4000 [ 90%] (Sampling)
## Chain 3: Iteration: 4000 / 4000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 6.423 seconds (Warm-up)
## Chain 3: 6.035 seconds (Sampling)
## Chain 3: 12.458 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 2.7e-05 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.27 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 4000 [ 0%] (Warmup)
## Chain 4: Iteration: 400 / 4000 [ 10%] (Warmup)
## Chain 4: Iteration: 800 / 4000 [ 20%] (Warmup)
## Chain 4: Iteration: 1200 / 4000 [ 30%] (Warmup)
## Chain 4: Iteration: 1600 / 4000 [ 40%] (Warmup)
## Chain 4: Iteration: 2000 / 4000 [ 50%] (Warmup)
## Chain 4: Iteration: 2001 / 4000 [ 50%] (Sampling)
## Chain 4: Iteration: 2400 / 4000 [ 60%] (Sampling)
## Chain 4: Iteration: 2800 / 4000 [ 70%] (Sampling)
## Chain 4: Iteration: 3200 / 4000 [ 80%] (Sampling)
## Chain 4: Iteration: 3600 / 4000 [ 90%] (Sampling)
## Chain 4: Iteration: 4000 / 4000 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 5.654 seconds (Warm-up)
## Chain 4: 6.049 seconds (Sampling)
## Chain 4: 11.703 seconds (Total)
## Chain 4:
## Warning: There were 4 chains where the estimated Bayesian Fraction of Missing Information was low. See
## https://mc-stan.org/misc/warnings.html#bfmi-low
## Warning: Examine the pairs() plot to diagnose sampling problems
## Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#bulk-ess
## Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#tail-ess
print(fit_model_A, pars = c('beta0','beta1','sigma_s','sigma_u','sigma_e'))
## Inference for Stan model: anon_model.
## 4 chains, each with iter=4000; warmup=2000; thin=1;
## post-warmup draws per chain=2000, total post-warmup draws=8000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## beta0 -0.07 0 0.21 -0.47 -0.21 -0.07 0.07 0.33 1911 1.00
## beta1 0.00 0 0.01 -0.01 0.00 0.00 0.01 0.02 6244 1.00
## sigma_s 0.50 0 0.16 0.28 0.40 0.47 0.58 0.90 4427 1.00
## sigma_u 0.23 0 0.03 0.18 0.21 0.23 0.25 0.29 2729 1.00
## sigma_e 0.12 0 0.02 0.08 0.10 0.12 0.13 0.16 287 1.01
##
## Samples were drawn using NUTS(diag_e) at Sun Jun 15 18:53:45 2025.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
y_rep_model_A <- as.matrix(fit_model_A, pars = "y_rep")
# Checks due to warnings
check_divergences(fit_model_A)
## 0 of 8000 iterations ended with a divergence.
check_treedepth(fit_model_A)
## 0 of 8000 iterations saturated the maximum tree depth of 10.
check_energy(fit_model_A)
## E-BFMI indicated possible pathological behavior:
## Chain 1: E-BFMI = 0.146
## Chain 2: E-BFMI = 0.140
## Chain 3: E-BFMI = 0.142
## Chain 4: E-BFMI = 0.104
## E-BFMI below 0.2 indicates you may need to reparameterize your model.
# Bayesian Fraction of Missing Information is in fact low
# but reparametrizing the model would make it different from the paper's
# The warning about bulk and tail ESS too low disappears with 8000 iterations instead of 4000
# but since posterior predictive checks do not improve significantly, it is unnecessary to use 8000
ess <- summarise_draws(as_draws_array(fit_model_A))
print(ess[1:5,c(1, 8, 9, 10)])
## # A tibble: 5 × 4
## variable rhat ess_bulk ess_tail
## <chr> <dbl> <dbl> <dbl>
## 1 beta0 1.00 1992. 2630.
## 2 beta1 1.00 6337. 5855.
## 3 sigma_s 1.00 5310. 4764.
## 4 sigma_u 1.00 2671. 5089.
## 5 sigma_e 1.01 287. 420.
# For sigma_e with iter=8000: ess_bulk=547, ess_tail=927, Rhat=1.00
# Posterior predictive checks
# Traceplot of the 4 chains to see if they mix well: sigma_e is in fact not as good as the others
traceplot(fit_model_A, pars = c('beta0', 'beta1', 'sigma_s', 'sigma_u', 'sigma_e'))

ggsave(file="images/traceplot.pdf", width=8, height=7)
# densities
ppc_dens_overlay(y = stan_data$deaths, y_rep_model_A[1:200,])+
xaxis_text(on =TRUE, size=22)+
legend_text(size=rel(4))

ggsave(file="images/densities_model_A.pdf", width=8, height=7)
# empirical distribution function
ppc_ecdf_overlay(y = stan_data$deaths, y_rep_model_A[1:200,])+
xaxis_text(on =TRUE, size=22)+
legend_text(size=rel(4))

ggsave(file="images/ecdf_model_A.pdf", width=8, height=7)
# proportion of zero
prop_zero <- function(x) mean(x == 0)
ppc_stat(y = stan_data$deaths, yrep = y_rep_model_A, stat = "prop_zero")+
xaxis_text(on =TRUE, size=22)+
yaxis_text(on =TRUE, size=22)+
legend_text(size=rel(4))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/proportion_zero_model_A.pdf", width=8, height=7)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# statistics
ppc_stat(y = stan_data$deaths, yrep = y_rep_model_A, stat="mean")+
xaxis_text(on =TRUE, size=22)+
theme(axis.title.x = element_text( size=22))+
legend_text(size=rel(1.6))
## Note: in most cases the default test statistic 'mean' is too weak to detect anything of interest.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/mean_model_A.pdf", width=5, height=5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppc_stat(y = stan_data$deaths, yrep = y_rep_model_A, stat="sd")+
xaxis_text(on =TRUE, size=22)+
theme(axis.title.x = element_text( size=22))+
legend_text(size=rel(1.6))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/sd_model_A.pdf", width=5, height=5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppc_stat(y = stan_data$deaths, yrep = y_rep_model_A, stat="median")+
xaxis_text(on =TRUE, size=22)+
theme(axis.title.x = element_text( size=22))+
legend_text(size=rel(1.6))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/median_model_A.pdf", width=5, height=5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppc_stat(y = stan_data$deaths, yrep = y_rep_model_A, stat="max")+
xaxis_text(on =TRUE, size=22)+
theme(axis.title.x = element_text( size=22))+
legend_text(size=rel(1.6))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/max_model_A.pdf", width=5, height=5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# standardized residuals
mean_y_rep_model_A <- colMeans(y_rep_model_A)
std_resid <- (stan_data$deaths - mean_y_rep_model_A) / sqrt(mean_y_rep_model_A)
qplot(mean_y_rep_model_A, std_resid) + hline_at(2) + hline_at(-2)+
labs(x="Mean of y_rep", y= "Standardized residuals")+
xaxis_text(on =TRUE, size=22)+
yaxis_text(on =TRUE, size=22)+
theme(axis.title.x = element_text(size=16),
axis.title.y = element_text(size=16))

ggsave(file="images/standardized_residuals_model_A.pdf", width =8, height =7)
# predictive intervals
ppc_intervals(
y = stan_data$deaths,
yrep = y_rep_model_A,
x = stan_data$uvb
) +
labs(x = "Deaths", y = "Expected deaths")+
xaxis_text(on =TRUE, size=22)+
yaxis_text(on =TRUE, size=22)+
theme(axis.title.x = element_text(size=20),
axis.title.y = element_text(size=20))

ggsave(file="images/predictive_intervals_model_A.pdf", width =8, height =7)
# Extract Leave-One-Out Cross-Validation
log_lik_A <- extract_log_lik(fit_model_A) # extract pointwise log-likelihood from stan model
loo_A <- loo(log_lik_A)
## Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.
print(loo_A)
##
## Computed from 8000 by 354 log-likelihood matrix.
##
## Estimate SE
## elpd_loo -1054.2 21.3
## p_loo 112.6 9.5
## looic 2108.3 42.6
## ------
## MCSE of elpd_loo is NA.
## MCSE and ESS estimates assume independent draws (r_eff=1).
##
## Pareto k diagnostic values:
## Count Pct. Min. ESS
## (-Inf, 0.7] (good) 317 89.5% 248
## (0.7, 1] (bad) 35 9.9% <NA>
## (1, Inf) (very bad) 2 0.6% <NA>
## See help('pareto-k-diagnostic') for details.